import time

import scrapy
from scrapydemo.items import BookItem


class SpiderdemoSpider(scrapy.Spider):
    name = 'demo'
    allowed_domains = ['book.douban.com']
    # 注意起始页的差异.起始页就是爬虫启动时抓取的入口页.
    start_urls = ['https://book.douban.com/tag/程序']

    def parse(self, response):
        # 获取小说列表
        item_list = response.xpath(".//li[@class='subject-item']")
        for item in item_list:
            # 解析列表信息,获取详情页.
            url_new = item.xpath(".//div[@class='info']/h2/a/@href").extract()[0]
            print(url_new)
            time.sleep(5)
            yield scrapy.Request(url=url_new,
                                 callback=self.parse_item)

    def parse_item(self, response):
        """
        详情页解析
        :param response:
        :return:
        """
        bookitem = BookItem()
        bookitem['bookname'] = response.xpath(".//div[@id='wrapper']/h1/span/text()").extract()[0]
        bookitem['score'] = response.xpath(".//div[@class='subjectwrap clearfix']/div[2]//strong/text()").extract()[0]
        bookitem['author'] = response.xpath(".//div[@id='info']/span[1]/a/text()").extract()[0]
        # bookitem['pubdate'] = response.xpath(".//div[@id='info']/text()[9]").extract()[0]
        # bookitem['isbn'] = response.xpath(".//div[@id='info']/text()[19]").extract()[0]
        yield bookitem